/*
Programmer: McNeel
Date: 03/30/2004
This program creates a SAS transport dataset of diet variables
  plus HHX, FMX, and PX identifier variables from the 2000 NHIS data.
*/
options mprint;

***MODIFY THE FOLLOWING LINES AS APPROPRIATE FOR YOUR LOCAL ENVIRONMENT.;
***AT THE END YOU SHOULD HAVE A DATASET NAMED WORK.NHIS CONTAINING THE 2000 NHIS SAMADULT DATA.;
  ***Output dataset;
  filename OUTFILE1 pipe 'gzip -c > /prj/arb/nhis/thompson/dietvars.2004_03_30.v8x.gz';
  
  ***This dataset has selected variables from the 2000 NHIS SAMADULT dataset;
  filename FILE1 pipe 'gunzip -c /prj/arb/nhis/thompson/diet.sst.gz';
  
  ***Formats for the dataset;
  %include '/prj/arb/nhis/thompson/formats02.diet.sas';
  
  proc cimport infile=FILE1 data=nhis;
***MODIFY THE PRECEDING LINES AS APPROPRIATE FOR YOUR LOCAL ENVIRONMENT.;

***This macro changes the given food values to missing values if they fall outside the acceptable range.             ;
***  food - The base of the food variable names, for example ColdC for the cold cereal variables ColdCTP and ColdCNo ;
***  daylimit - The maximum daily acceptable value for that food                                                     ;
%macro SetExtremeValuesToMissing(food, daylimit);
  if ( &food.TP = 1 and (&food.No > &daylimit) ) or
     ( &food.TP = 2 and (&food.No > 14) ) or
     ( &food.TP = 3 and (&food.No > 60) ) or
     ( &food.TP in (7, 8, 9) )
    then do;
      &food.TP = .;
      &food.No = .;
  end;
%mend;

data nhis;
  set nhis;

  if MilkKnd in (7, 8, 9) then do;
    MilkNo = .;
    MilkTP = .;
  end;

  ***Set the number and time period for the milk variables.;
  ***If a person drank one type of milk then the other types;
  ***of milk are zero, not missing;
  if MilkTP ^= . then do;
    WholeMilkNo = 0;
    WholeMilkTP = 0;
    TwoPctMilkNo = 0;
    TwoPctMilkTP = 0;
    OnePctMilkNo = 0;
    OnePctMilkTP = 0;
    SkimMilkNo = 0;
    SkimMilkTP = 0;
    OtherMilkNo = 0;
    OtherMilkTP = 0;

    if MilkKnd = 1 then do;
      WholeMilkNo = MilkNo;
      WholeMilkTP = MilkTP;
    end;
    else if MilkKnd = 2 then do;
      TwoPctMilkNo = MilkNo;
      TwoPctMilkTP = MilkTP;
    end;
    else if MilkKnd = 3 then do;
      OnePctMilkNo = MilkNo;
      OnePctMilkTP = MilkTP;
    end;
    else if MilkKnd in (4, 5) then do;
      SkimMilkNo = MilkNo;
      SkimMilkTP = MilkTP;
    end;
    else if MilkKnd = 6 then do;
      OtherMilkNo = MilkNo;
      OtherMilkTP = MilkTP;
    end;
  end;

  %SetExtremeValuesToMissing(ColdC,      10);
  %SetExtremeValuesToMissing(WholeMilk,  5);
  %SetExtremeValuesToMissing(TwoPctMilk, 6);
  %SetExtremeValuesToMissing(OnePctMilk, 6);
  %SetExtremeValuesToMissing(SkimMilk,   5);
  %SetExtremeValuesToMissing(OtherMilk,  5);
  %SetExtremeValuesToMissing(Bacon,      3);
  %SetExtremeValuesToMissing(HtDog,      2);
  %SetExtremeValuesToMissing(Bread,      5);
  %SetExtremeValuesToMissing(Juice,      4);
  %SetExtremeValuesToMissing(Fruit,      12);
  %SetExtremeValuesToMissing(Dress,      3);
  %SetExtremeValuesToMissing(Salad,      5);
  %SetExtremeValuesToMissing(Fries,      3);
  %SetExtremeValuesToMissing(Potat,      3);
  %SetExtremeValuesToMissing(Beans,      3);
  %SetExtremeValuesToMissing(OVeg,       9);
  %SetExtremeValuesToMissing(Pasta,      3);
  %SetExtremeValuesToMissing(PNut,       3);
  %SetExtremeValuesToMissing(Chips,      3);

  ***Create variables for the number of times per day each food is consumed;
  array aFoodTP[*] ColdCTP WholeMilkTP TwoPctMilkTP OnePctMilkTP SkimMilkTP
    OtherMilkTP BaconTP HtDogTP BreadTP JuiceTP FruitTP DressTP SaladTP FriesTP
    PotatTP BeansTP OVegTP PastaTP PNutTP ChipsTP;
  array aFoodNo[*] ColdCNo WholeMilkNo TwoPctMilkNo OnePctMilkNo SkimMilkNo
    OtherMilkNo BaconNo HtDogNo BreadNo JuiceNo FruitNo DressNo SaladNo FriesNo
    PotatNo BeansNo OVegNo PastaNo PNutNo ChipsNo;
  array aFoodPerDay[*] ColdCPerDay WholeMilkPerDay TwoPctMilkPerDay
    OnePctMilkPerDay SkimMilkPerDay OtherMilkPerDay BaconPerDay HtDogPerDay
    BreadPerDay JuicePerDay FruitPerDay DressPerDay SaladPerDay FriesPerDay
    PotatPerDay BeansPerDay OVegPerDay PastaPerDay PNutPerDay ChipsPerDay;
  do i = 1 to dim(aFoodTP);
    if      aFoodTP[i] = 0 then aFoodPerDay[i] = 0;
    else if aFoodTP[i] = 1 then aFoodPerDay[i] = aFoodNo[i];
    else if aFoodTP[i] = 2 then aFoodPerDay[i] = aFoodNo[i] / 7;
    else if aFoodTP[i] = 3 then aFoodPerDay[i] = aFoodNo[i] / 30;
    else if aFoodTP[i] = 4 then aFoodPerDay[i] = aFoodNo[i] / 365;
  end;

  label AgeGrpA = 'Age group';
  if      18 <= Age_P <= 27 then AgeGrpA = 1;
  else if 28 <= Age_P <= 37 then AgeGrpA = 2;
  else if 38 <= Age_P <= 47 then AgeGrpA = 3;
  else if 48 <= Age_P <= 57 then AgeGrpA = 4;
  else if 58 <= Age_P <= 67 then AgeGrpA = 5;
  else if 68 <= Age_P <= 77 then AgeGrpA = 6;
  else if       Age_P >= 78 then AgeGrpA = 7;
run;

***Input median portion size in pyramid servings per mention by gender and age for fruits and vegetables analyses, ;
***from the USDA's 1994-96 Continuing Survey of Food Intakes of Individuals (CSFII 94-96) dietary recall data;
data FVPS;
  input Sex AgeGrpA FVPSFruit FVPSJuice FVPSFries FVPSPotat FVPSSalad FVPSOVeg FVPSBeans FVPSTomSc FVPSVegSoup;
  datalines;
1 1 1.301000 2.000000 2.000000 2.000000 0.545000 0.750000 1.374000 0.626000 1.144000 
1 2 1.301000 1.667500 2.000000 2.000000 0.708000 0.906000 1.047000 0.587000 1.286500 
1 3 1.229571 1.335000 1.773000 1.999000 0.754500 0.974500 1.065000 0.579000 1.246000 
1 4 1.227333 1.335000 1.710000 1.999000 0.750000 1.000000 1.227000 0.607000 1.286000 
1 5 1.168000 1.334000 1.400000 1.914000 0.833500 1.000000 1.000000 0.533600 1.507500 
1 6 1.168000 1.001000 1.250000 1.544000 0.750000 0.880000 1.000000 0.606667 1.268000 
1 7 1.052333 1.001000 1.250000 1.508000 0.822500 0.833333 1.114000 0.405000 1.180000 
2 1 1.168000 1.500500 1.481000 1.544000 0.613500 0.702200 0.964000 0.410000 1.234500 
2 2 1.168000 1.334000 1.365500 1.544000 0.572500 0.779333 0.684000 0.400000 0.906000 
2 3 1.168000 1.334000 1.272000 1.528000 0.833333 0.792500 0.800000 0.402000 1.507000 
2 4 1.168000 1.251250 1.400000 1.544000 1.000000 0.788500 0.687000 0.396000 1.131500 
2 5 1.150500 1.019500 1.000000 1.499000 0.795500 0.774000 0.822000 0.477500 1.246000 
2 6 1.083833 1.000500 1.026000 1.516000 0.625000 0.833000 0.807000 0.356000 1.159500 
2 7 1.000000 1.000500 1.000000 1.272000 0.750000 0.856750 1.000000 0.252000 1.017000 
;

***Input median portion size in grams per mention by gender and age for percent energy from fat and fiber analyses, from CSFII 94-96;
data OthPS;
  input Sex AgeGrpA OthPSJuice OthPSFruit OthPSSalad OthPSFries OthPSPotat OthPSBeans OthPSOVeg OthPSColdC OthPSWholeMilk OthPSTwoPctMilk 
    OthPSOnePctMilk OthPSSkimMilk OthPSBacon OthPSHtDog OthPSBread OthPSDress OthPSPasta OthPSPNut OthPSChips OthPSVegSoup OthPSOtherMilk;
  datalines;
1 1 372.000000 131.750000 29.000000 112.500000 210.000000 180.000000 60.013333 74.666667 305.000000 259.250000 341.600000 366.666667 25.000000 114.000000 56.000000 23.543333 330.000000 31.625000 40.000000 361.000000 163.340000 
1 2 311.250000 128.000000 36.666667 114.000000 196.000000 130.000000 73.000000 61.500000 259.250000 305.000000 245.000000 250.000000 40.250000  85.500000 54.000000 23.626667 280.000000 58.000000 40.000000 428.750000 163.340000 
1 3 249.000000 123.200000 41.250000 100.000000 184.000000 172.000000 74.063333 57.500000 306.710000 244.000000 245.000000 250.000000 32.000000  88.000000 52.000000 22.030000 280.000000 35.500000 31.895000 361.500000 163.340000 
1 4 249.000000 127.500000 35.000000 100.000000 161.000000 172.000000 79.833333 56.000000 244.000000 244.000000 244.000000 245.000000 32.000000 114.000000 52.000000 27.500000 247.500000 54.665000 30.000000 361.500000 163.340000 
1 5 248.000000 122.000000 42.000000  85.500000 145.000000 158.125000 76.500000 46.000000 244.000000 244.000000 213.500000 214.375000 27.000000  57.000000 51.000000 24.585000 280.000000 39.250000 26.000000 361.500000 163.340000 
1 6 186.750000 118.000000 41.250000  85.500000 127.000000 175.000000 73.000000 39.000000 244.000000 183.000000 223.666667 198.937500 26.000000  57.000000 48.250000 19.285000 210.000000 17.130000 21.000000 321.750000 163.340000 
1 7 186.750000 114.250000 44.666667  97.000000 107.000000 170.100000 67.520909 33.000000 203.333333 183.000000 183.000000 160.725000 24.000000  57.000000 48.000000 15.600000 210.000000 35.916667 17.500000 331.990000 163.340000 
2 1 280.125000 118.000000 31.500000  79.500000 122.000000 126.500000 53.750000 50.000000 244.000000 244.000000 244.000000 245.000000 26.000000  57.000000 50.000000 17.140000 217.500000 18.000000 28.000000 335.500000  61.000000 
2 2 249.000000 118.000000 30.940000  70.000000 127.000000  89.000000 61.625000 49.500000 244.000000 244.000000 244.000000 245.000000 25.000000  57.000000 48.000000 20.626667 217.500000 32.000000 24.333333 352.500000  61.000000 
2 3 248.800000 118.000000 44.250000  70.000000 116.000000 126.500000 61.500000 44.000000 244.000000 244.000000 183.000000 244.800000 24.000000  57.000000 47.500000 23.020000 182.525000 20.655000 27.000000 361.500000  61.000000 
2 4 233.250000 118.000000 51.875000  70.000000 122.000000 126.500000 61.532500 43.500000 244.000000 244.000000 152.500000 229.690000 24.000000 114.000000 45.000000 21.873333 185.000000 21.265000 26.000000 244.000000  61.000000 
2 5 189.755000 118.000000 41.250000  66.000000 105.000000 126.500000 63.165000 33.000000 198.250000 183.000000 183.000000 196.000000 18.000000  57.000000 45.000000 22.035000 165.000000 18.250000 20.000000 300.250000  61.000000 
2 6 186.600000 112.427143 33.666667  70.000000 105.000000 126.500000 67.142857 33.000000 198.250000 183.000000 183.000000 183.750000 19.500000  57.000000 42.400000 18.335000 160.000000 11.250000 18.000000 244.000000  61.000000 
2 7 186.700000 109.000000 41.250000  64.000000 105.000000 173.000000 71.333333 33.500000 196.400000 152.500000 218.583333 183.750000 16.000000  57.000000 34.000000 10.210000 175.000000 25.500000 14.000000 244.000000  61.000000 
;

proc sort data=nhis;
  by Sex AgeGrpA;
run;

***Add the portion size variables to the NHIS dataset and estimate daily intake;
data nhis;
  merge nhis(in=InNHIS) FVPS OthPS;
  by Sex AgeGrpA;
  if InNHIS;

  ***SAS addition operators are used instead of sum statements so that the;
  ***final values will be missing if any of the food variables are missing;

  ***Estimate daily servings of fruit & vegetables (sum of reported ;
  ***frequencies * mean portion size per sex/age group);
  FVAllServ = JuicePerDay*FVPSJuice + FruitPerDay*FVPSFruit
    + SaladPerDay*FVPSSalad + FriesPerDay*FVPSFries
    + PotatPerDay*FVPSPotat + BeansPerDay*FVPSBeans
    + OVegPerDay*FVPSOVeg;

  ***Apply estimated regression coefficients for sum of foods predicting servings of total ;
  ***fruits and vegetables (from CSFII 94-96);
  label FVAll = 'Daily servings of all fruits & vegetables (03/30/2004)';
  if      Sex=1 then FVAll = ( 0.906793 + 0.758560*(sqrt(FVAllServ)) )**2;
  else if Sex=2 then FVAll = ( 0.819559 + 0.730865*(sqrt(FVAllServ)) )**2;

  label SqRtFVAll = 'Square root of daily servings of all fruits & vegetables (03/30/2004)';
  SqRtFVAll = sqrt(FVAll);

  ***Estimate daily servings of fruit & vegetables except French fries ;
  ***(sum of reported frequencies * mean portion size per sex/age group);
  FVNoFFServ = JuicePerDay*FVPSJuice + FruitPerDay*FVPSFruit
    + SaladPerDay*FVPSSalad + PotatPerDay*FVPSPotat
    + BeansPerDay*FVPSBeans + OVegPerDay*FVPSOVeg;

  ***Apply estimated regression coefficients for sum of foods predicting servings of total ;
  ***fruits and vegetables excluding French fries (from CSFII 94-96);
  label FVNoFF = 'Daily servings of fruits & vegetables except French fries (03/30/2004)';
  if      Sex=1 then FVNoFF = ( 0.940772 + 0.739056*(sqrt(FVNoFFServ)) )**2;
  else if Sex=2 then FVNoFF = ( 0.816265 + 0.730219*(sqrt(FVNoFFServ)) )**2;

  label SqRtFVNoFF = 'Square root of daily servings of all fruits & vegetables except French fries (03/30/2004)';
  SqRtFVNoFF = sqrt(FVNoFF);

  ***Estimate grams of each food that contributes to PEFat and fiber estimations ;
  ***(daily frequency * mean grams per sex/age group);
  array aFoodPerDay[*] JuicePerDay FruitPerDay SaladPerDay FriesPerDay PotatPerDay
    BeansPerDay OVegPerDay ColdCPerDay WholeMilkPerDay TwoPctMilkPerDay
    OnePctMilkPerDay SkimMilkPerDay BaconPerDay HtDogPerDay BreadPerDay
    DressPerDay PastaPerDay PNutPerDay ChipsPerDay OtherMilkPerDay;
  array aOthPS[*] OthPSJuice OthPSFruit OthPSSalad OthPSFries OthPSPotat 
    OthPSBeans OthPSOVeg OthPSColdC OthPSWholeMilk OthPSTwoPctMilk 
    OthPSOnePctMilk OthPSSkimMilk OthPSBacon OthPSHtDog OthPSBread
    OthPSDress OthPSPasta OthPSPNut OthPSChips OthPSOtherMilk;
  array aFoodGrams[*] JuiceGrams FruitGrams SaladGrams FriesGrams PotatGrams
    BeansGrams OVegGrams ColdCGrams WholeMilkGrams TwoPctMilkGrams
    OnePctMilkGrams SkimMilkGrams BaconGrams HtDogGrams BreadGrams
    DressGrams PastaGrams PNutGrams ChipsGrams OtherMilkGrams;
  do i = 1 to dim(aFoodPerDay);
    aFoodGrams[i] = aFoodPerDay[i] * aOthPS[i];
  end;

  ***Apply estimated regression coefficients for sum of foods predicting percent energy from fat (from CSFII 94-96);
  label PEFat = 'Percent energy from fat (03/30/2004)';
  if Sex=1 then PEFat = 31.952688 - 0.025863*ColdCGrams + 0.006512*WholeMilkGrams + 0.002045*TwoPctMilkGrams 
    - 0.001500*OnePctMilkGrams - 0.008536*SkimMilkGrams - 0.026527*OtherMilkGrams + 0.138079*BaconGrams
    + 0.041135*HtDogGrams - 0.005354*JuiceGrams - 0.009290*FruitGrams + 0.149646*DressGrams + 0.027202*FriesGrams
    + 0.005688*PotatGrams - 0.005231*BeansGrams - 0.005029*PastaGrams + 0.125658*PNutGrams + 0.053905*ChipsGrams;
  else if Sex=2 then PEFat = 31.371597 - 0.057206*ColdCGrams + 0.008364*WholeMilkGrams + 0.002634*TwoPctMilkGrams 
    - 0.002082*OnePctMilkGrams - 0.008775*SkimMilkGrams - 0.047247*OtherMilkGrams + 0.231718*BaconGrams
    + 0.101657*HtDogGrams - 0.010099*JuiceGrams - 0.011894*FruitGrams + 0.238928*DressGrams + 0.042656*FriesGrams
    + 0.006148*PotatGrams - 0.005934*BeansGrams - 0.005416*PastaGrams + 0.260003*PNutGrams + 0.130953*ChipsGrams;

  ***Apply estimated regression coefficients for sum of foods predicting grams of fiber (from CSFII 94-96);
  label Fiber = 'Daily grams of fiber (03/30/2004)';
  if Sex=1 then CubeRtFiber = 2.083624 + 0.002045*ColdCGrams + 0.000129*WholeMilkGrams + 0.000137*TwoPctMilkGrams 
    + 0.000225*OnePctMilkGrams + 0.000285*SkimMilkGrams + 0.001474*OtherMilkGrams - 0.001377*BaconGrams
    + 0.002827*BreadGrams + 0.000187*JuiceGrams + 0.001031*FruitGrams + 0.001603*FriesGrams + 0.000716*PotatGrams 
    + 0.002748*BeansGrams + 0.000839*OVegGrams + 0.000753*PastaGrams + 0.005401*PNutGrams + 0.005276*ChipsGrams;
  else if Sex=2 then CubeRtFiber = 1.898219 + 0.003843*ColdCGrams + 0.000095*WholeMilkGrams + 0.000115*TwoPctMilkGrams 
    + 0.000245*OnePctMilkGrams + 0.000344*SkimMilkGrams + 0.003496*OtherMilkGrams - 0.002043*BaconGrams 
    - 0.001416*HtDogGrams + 0.003373*BreadGrams + 0.000246*JuiceGrams + 0.001046*FruitGrams + 0.001563*FriesGrams 
    + 0.000658*PotatGrams + 0.003787*BeansGrams + 0.000932*OVegGrams + 0.000826*PastaGrams + 0.006053*PNutGrams 
    + 0.004592*ChipsGrams;
  Fiber = CubeRtFiber**3;
run;

proc summary nway data=nhis;
  weight WtFA_SA;
  class Sex;
  var SqRtFVAll;
  output out=FVAllMeans mean=SqRtFVAllMeanBySex;
run;

proc summary nway data=nhis;
  weight WtFA_SA;
  class Sex;
  var SqRtFVNoFF;
  output out=FVNoFFMeans mean=SqRtFVNoFFMeanBySex;
run;

proc summary nway data=nhis;
  weight WtFA_SA;
  class Sex;
  var CubeRtFiber;
  output out=FiberMeans mean=CubeRtFiberMeanBySex;
run;

proc summary nway data=nhis;
  weight WtFA_SA;
  class Sex;
  var PEFat;
  output out=PEFatMeans mean=PEFatMeanBySex;
run;

proc sort data=nhis;        by Sex;
proc sort data=FVAllMeans;  by Sex;
proc sort data=FVNoFFMeans; by Sex;
proc sort data=FiberMeans;  by Sex;
proc sort data=PEFatMeans;  by Sex;

data nhis(keep=HHX FMX PX FVAll FVAllAdj FVNoFF FVNoFFAdj PEFat PEFatAdj Fiber FiberAdj);
  merge
    nhis
    FVAllMeans(keep=Sex SqRtFVAllMeanBySex)
    FVNoFFMeans(keep=Sex SqRtFVNoFFMeanBySex)
    FiberMeans(keep=Sex CubeRtFiberMeanBySex)
    PEFatMeans(keep=Sex PEFatMeanBySex)
    ;
  by Sex;

  ***Apply variance adjustment factors from the Observing Protein and Energy Nutrition Study Study (OPEN);

  label FVAllAdj = 'Variance-adjusted daily servings of all fruits & vegetables (03/30/2004)';
  if      Sex = 1 then FVAllAdj = (1.3*(SqRtFVAll - SqRtFVAllMeanBySex) + SqRtFVAllMeanBySex)**2;
  else if Sex = 2 then FVAllAdj = (1.1*(SqRtFVAll - SqRtFVAllMeanBySex) + SqRtFVAllMeanBySex)**2;

  label FVNoFFAdj = 'Variance-adjusted daily servings of fruits & vegetables except French fries (03/30/2004)';
  if      Sex = 1 then FVNoFFAdj = (1.3*(SqRtFVNoFF - SqRtFVNoFFMeanBySex) + SqRtFVNoFFMeanBySex)**2;
  else if Sex = 2 then FVNoFFAdj = (1.2*(SqRtFVNoFF - SqRtFVNoFFMeanBySex) + SqRtFVNoFFMeanBySex)**2;

  label PEFatAdj = 'Variance-adjusted percent energy from fat (03/30/2004)';
  if      Sex = 1 then PEFatAdj = 1.5*(PEFat - PEFatMeanBySex) + PEFatMeanBySex;
  else if Sex = 2 then PEFatAdj = 1.3*(PEFat - PEFatMeanBySex) + PEFatMeanBySex;

  label FiberAdj = 'Variance-adjusted daily grams of fiber (03/30/2004)';
  if      Sex = 1 then FiberAdj = (1.2*(CubeRtFiber - CubeRtFiberMeanBySex) + CubeRtFiberMeanBySex)**3;
  else if Sex = 2 then FiberAdj = (1.2*(CubeRtFiber - CubeRtFiberMeanBySex) + CubeRtFiberMeanBySex)**3;

  array a_vars[*] FVAll FVAllAdj FVNoFF FVNoFFAdj PEFat PEFatAdj Fiber FiberAdj;
  do i = 1 to dim(a_vars);
    a_vars[i] = round(a_vars[i],.000001);
  end;
run;

proc sort data=nhis;
  by hhx fmx px;
run;

proc contents data=nhis;

proc cport data=nhis file=OUTFILE1;
